# Table_A11.R

# Part of the replication archive for 
#
#   Bullock, John G., and Kelly Rader. 2021. "Response Options and the 
#   Measurement of Political Knowledge." Forthcoming in the British Journal 
#   of Political Science.


library(Bullock)
library(dplyr)   # for %>%, filter(), mutate(), select(), etc.
library(Hmisc)   # for llist()
library(readr)   # for parse_factor(), a better version of factor()
library(survey)  # for svyby(), svydesign(), svymean()
library(tibble)  # for tibble(), used to build dataToPlot
library(tidyr)   # for unite()

USE_WEIGHTS <- TRUE

source(here::here("R/SSI_2017_coding.R"))
filenameStem <- here::here("float_output/Table_A11")
PDFtitle     <- 'Range of Percentage Answering Correctly for Each Question'



# **************************************************************************
# CREATE THE DATA FRAME ####
# **************************************************************************
# List of dummy variables that indicate whether someone answered correctly.
# The first six items are the ones for which we had five different response-
# option conditions.  
correctVars <- llist(  

  # Items for which we had five different response-option conditions.
  ChiefJustice_correct,
  HowManyJusticesCurrently_correct,
  HowManyJusticesUsually_correct,
  HowManyWomen_correct,
  SenMajLeader_correct,
  TermLength_correct,
  
  # The same six items as above. [2019 09 06]
  ChiefJustice_correct,
  HowManyJusticesCurrently_correct,
  HowManyJusticesUsually_correct,
  HowManyWomen_correct,
  SenMajLeader_correct,
  TermLength_correct,
  
  # Now considering the six items for which we varied only the number of 
  # response options.  [2019 09 06]
  ProcedureForChoosingJustices_correct,
  WatchLawyersArgue_correct,           
  FinalSay_correct,                   
  TieVoteProcedure_correct,           
  CourtPower_correct,                 
  JusticeRemoval_correct)  

correctVarsDF <- data.frame(correctVars) %>%  # need separate DF b/c we need the var. names
  filter(!is.na(originalData$weight)) 
  
correctVarsDO <- correctVarsDF %>%            # DO = "design object"
  { svydesign(
      ids     = ~1, 
      data    = ., 
      weights = if (USE_WEIGHTS) na.omit(originalData$weight) else NULL
    ) 
  }
    

# List of variables indicating the conditions to which subjects were assigned.
conditionVars <- llist(  
  
  # Factor variables for questions that had five conditions.
  ChiefJustice_condition,
  HowManyJusticesCurrently_condition,
  HowManyJusticesUsually_condition,
  HowManyWomen_condition,
  SenMajLeader_condition,
  TermLength_condition,
  
  # Numeric variables for the six questions listed above.
  ChiefJustice_numROs,
  HowManyJusticesCurrently_numROs,
  HowManyJusticesUsually_numROs,
  HowManyWomen_numROs,
  SenMajLeader_numROs,
  TermLength_numROs,

  # Numeric variables for questions that had only "short" and "long" conditions.
  JusticesChosen_numROs,          
  WatchLawyersArgue_numROs,       
  ConflictOverMeaning_numROs,     
  IfJusticesSplit_numROs,         
  CourtPowerDescription_numROs,   
  JusticeRemoval_numROs,     

  labels = FALSE) %>%

  
  # Convert numeric to factor -- necessary for creation of the "questions" 
  # variable below.
  lapply(., factor) 


conditionVarsDF <- data.frame(conditionVars) %>%
  # Remove subjects who have no weight variable. (These subjects are also 
  # missing demographic information.) We do this even though we don't 
  # weight the condition variables themselves.
  filter(!is.na(originalData$weight))  
  
  

# LISTS INDICATING PERCENTAGES CORRECT AND INCORRECT FOR EACH QUESTION
# Consider "percentCorrect". It is a list with one element for each element 
# of correctVars. And each element is a named vector of the percentages 
# correct in each condition.  
getPercentages <- function (varFormula, myFac, designObject) {
  # Run tapply/svyby to extract means of variable specified in "varFormula" 
  # by the levels specfied in "myFac", a factor variable.  
  meanTable      <- svyby(varFormula, myFac, designObject, svymean)
  means          <- meanTable[, 3]
  conditionNames <- rownames(meanTable)
  names(means) <- conditionNames
  means
}

percentCorrect <- rep(list(NULL), length(correctVars))
for (i in seq_along(correctVars)) {
  myFormula <- formula(paste0("~", names(correctVarsDF)[i]))
  percentCorrect[[i]] <- getPercentages(myFormula, conditionVarsDF[[i]], correctVarsDO)
}


questionNames <- rep(names(conditionVars), sapply(conditionVars, nlevels)) %>% 
  gsub('_condition|_numROs', '', .)   # e.g., convert "ChiefJustice_condition" to "ChiefJustice" 
dataToPlot <- tibble(
  question          = questionNames,  # ChiefJustice, TermLength, etc.
  responseOptionSet = sapply(conditionVars, levels) %>% unlist,
  percentCorrect    = unlist(percentCorrect))  


# FILTER OUT THE "SUPER-EASY" CHIEF JUSTICE CONDITION
dataToPlot <- dataToPlot %>% 
  filter(!grepl('superEasy', dataToPlot$responseOptionSet)) %>%
  filter(responseOptionSet != "OE")



# CREATE ROWS FOR MEAN PERCENT CORRECT
# Create rows for mean percent correct, in each response condition (3:easy,
# 3:hard, 5:easy, 5:hard, 3, 5, and OE) across all relevant questions. 
#   Note: these means are not weighted by sample size. For example, even 
# though fewer people were assigned to the open-ended "How many justices 
# currently" question than to the open-ended "How many women" question, the 
# two questions are given equal weight. It probably doesn't make a  
# difference -- but even so, we should probably change it.  [2020 01 17]
dataToPlot <- dataToPlot %>%
  group_by(responseOptionSet) %>%
  bind_rows(
    .,
    dplyr::summarize(., question = "mean", percentCorrect = mean(percentCorrect))) %>%
  ungroup() %>% 
  
    
  # ORDER THE COLUMNS  
  # Order response-option conditions so that they have the order that we want
  # for the x axes of our panels.
  mutate(responseOptionSet = parse_factor(
    x = responseOptionSet,
    levels = qw("3:easy 5:easy 3:hard 5:hard OE 3 5 easy hard"))) %>%  
  
  # Order questions for our convenience when viewing the dataToPlot data frame.
  mutate(question= parse_factor(
    x = question,
    levels = qw("ChiefJustice HowManyJusticesCurrently HowManyJusticesUsually 
                 HowManyWomen SenMajLeader TermLength 

                 ConflictOverMeaning CourtPowerDescription IfJusticesSplit
                 JusticeRemoval JusticesChosen WatchLawyersArgue

                 mean")))  



# CHANGE ORDER OF ROWS
# For my convenience when examining the data frame 
dataToPlot <- arrange(dataToPlot, question, responseOptionSet)


# MULTIPLY "percentCorrect" COLUMN BY 100
# Convert from proportions to percentages.
dataToPlot$percentCorrect <- dataToPlot$percentCorrect * 100



# **************************************************************************
# FILTER AND SORT THE DATA FRAME ####
# **************************************************************************
# Save, from dataToPlot, only the info that we need to make the table.
dataToPlot <- dataToPlot %>%
  group_by(question) %>%
  filter(percentCorrect %in% range(percentCorrect)) %>%
  arrange(question, percentCorrect) %>%
  mutate(level = c("min", "max")) %>%
  pivot_wider(id_cols = "question", names_from = "level", values_from = "percentCorrect") 

# The first six questions had four different closed-ended conditions. The 
# next six questions had only two conditions: "three response options" or 
# "five response options." Sort each tier by the range of the percentages
# correct.  [2020 08 07]
dataToPlot <- bind_rows(
  dataToPlot[01:06, ] %>% arrange(desc(max - min)),
  dataToPlot[07:12, ] %>% arrange(desc(max - min)),
  dataToPlot[13, ] 
)
  


# **************************************************************************
# MAKE THE LATEX TABLE ####
# **************************************************************************
rowNames <- c(
  "How many women",
  "How many justices currently",
  "How many justices usually",
  "Name of Senate Majority Leader",
  "Name of Chief Justice",
  "Term length of justice",
  
  "Possible to remove justices",
  "Final say over Constitution",
  "Possible to watch lawyers argue",
  "How are justices chosen",
  "Best description of Court power",
  "What happens if tie decision",
  
  "Mean")


colNames <- list(
  c("minimum",   "maximum"),
  c("\\% correct", "\\% correct")
)

caption <- '\\textit{Ranges of percentages answering correctly across closed-ended conditions.} We calculated the percentage of subjects answering each question correctly in each closed-ended condition. This table reports, for each question, the range of percentages correct, where the range is taken across closed-ended conditions.\\newline Within each tier, questions are ordered from those with the largest range to those with the smallest. The first six questions had four closed-ended conditions: long-difficult, short-difficult, long-easy, and short-easy. The next six questions had only two conditions: long (five response options) and short (three response options).'

table_percentCorrectRanges <- latexTable(
  mat                     = dataToPlot %>% ungroup() %>% select(-question) %>% as.matrix(),
  SE_table                = FALSE,
  colNames                = colNames,
  rowNames                = rowNames,
  headerFooter            = TRUE,
  spaceBetweenColNameRows = '-.065in',
  decimalPlaces           = 0,
  caption                 = caption,
  landscape               = FALSE,
  floatPlacement          = 'p',
  commandName             = 'TabAppendixPercentCorrectRanges',
  callCommand             = FALSE)


# Center the columns
table_percentCorrectRanges <- 
  gsub(">{{\\hspace*{0em}}}N{2}{0}", "c", table_percentCorrectRanges, fixed = TRUE)


# Insert vertical space between tiers.
ind <- min(grep('Possible to remove justices', table_percentCorrectRanges))
table_percentCorrectRanges <- c(
  table_percentCorrectRanges[1:(ind-1)], 
  "        \\addlinespace[.15in]",
  table_percentCorrectRanges[ind:length(table_percentCorrectRanges)]
)
ind <- min(grep('Mean', table_percentCorrectRanges))
table_percentCorrectRanges <- c(
  table_percentCorrectRanges[1:(ind-1)], 
  "        \\addlinespace[.15in]",
  table_percentCorrectRanges[ind:length(table_percentCorrectRanges)]
)


# Write the LaTeX table to a file.
latexTablePDF(
  latexTable         = table_percentCorrectRanges, 
  firstPageEmpty     = FALSE,  
  continuedFloat     = FALSE,  
  container          = FALSE, 
  outputFilenameStem = filenameStem,
  overwriteExisting  = TRUE,
  writePDF           = FALSE,
  writeTex           = TRUE,
  openPDFOnExit      = FALSE)
